#' =============================================================================
#' FILE: 02_cueing_data.R
#' DESCRIPTION:
#'   Processes and recodes survey data from the Peru questionnaire, including
#'   political identities, identity scales, cueing experiment variables, and
#'   covariates. Produces the main datasets for subsequent analysis scripts.
#'
#'   Creates table A3 (Supplemental Material) fit indices for the CFA models.
#'
#' PACKAGES REQUIRED: pacman, tidyverse, lavaan
#'
#' OUTPUTS:
#'   - 04_outputs/clean_dataset.rds
#'   - 04_outputs/clean_dataset_reshaped.rds
# =============================================================================

# Packages and dataset ---------------------------------------------------------

# Required packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, 
               lavaan)

# Import data
df <- read.csv("02_data/questionnaire_peru_June 15, 2023_12.15.csv")

# Recoding Political Identities ------------------------------------------------

# PARTISAN IDENTITIES
## Clean variables before imputing nones. 
df$pol_01_14_TEXT <- trimws(df$pol_01_14_TEXT)
df$pol_02_14_TEXT <- trimws(df$pol_02_14_TEXT)
df$party_id <- NA
## Nones
df$party_id[df$pol_02 == 'No se siente cercano a ningún partido político' |
              df$pol_02 == 'No sabe' |
              df$pol_01_14_TEXT == 'Nibguno' |
              df$pol_01_14_TEXT == 'ninguno' |
              df$pol_01_14_TEXT == 'Ninguno' |
              df$pol_01_14_TEXT == 'NINGUNO' |
              df$pol_01_14_TEXT == 'Ninguna' |
              df$pol_01_14_TEXT == 'Ningún partido político me convence con sus ideas' |
              df$pol_01_14_TEXT == 'Ningino' |
              df$pol_02_14_TEXT == 'Ninguno' |
              (df$pol_01 == 'Otro. ¿Cuál?' &
                 df$pol_01_14_TEXT == "") |
              (df$pol_02 == 'Otro. ¿Cuál?' &
                 df$pol_02_14_TEXT == "") |
              df$pol_02 == ""] <- 'none'
## Fuerza Popular
df$party_id[df$pol_01 == "Fuerza Popular" |
              df$pol_02 == "Fuerza Popular" |
              df$pol_01_14_TEXT == "Fuerza Popular" | #Place holder for now. 
              df$pol_02_14_TEXT == "Fuerza Popular"] <- "FP"
## Renovacion Popular 
df$party_id[df$pol_01 == "Renovación Popular" |
              df$pol_02 == "Renovación Popular" |
              df$pol_01_14_TEXT == "Renovación Popular" | # Placeholder
              df$pol_02_14_TEXT == "Renovación Popular"] <- "RP"
## Peru Libre
df$party_id[df$pol_01 == "Perú Libre" |
              df$pol_02 == "Perú Libre" |
              df$pol_01_14_TEXT == "peru libre" |
              df$pol_02_14_TEXT == "peru libre"] <- "PL"
## Accion Popular
df$party_id[df$pol_01 == "Acción Popular" |
              df$pol_02 == "Acción Popular" |
              df$pol_01_14_TEXT == "Acción Popular" |
              df$pol_02_14_TEXT == "Acción Popular"] <- "AP"
## Alianza por el progreso
df$party_id[df$pol_01 == "Alianza para el Progreso" |
              df$pol_02 == "Alianza para el Progreso" |
              df$pol_01_14_TEXT == "Alianza para el Progreso" |
              df$pol_02_14_TEXT == "Alianza para el Progreso"] <- "APP"
## Juntos por el Peru
df$party_id[df$pol_01 == "Juntos por el Perú" |
              df$pol_02 == "Juntos por el Perú" |
              df$pol_01_14_TEXT == "Juntos por el Perú" |
              df$pol_02_14_TEXT == "Juntos por el Perú"] <- "JP"
## Avanza Pais
df$party_id[df$pol_01 == "Avanza País" |
              df$pol_02 == "Avanza País" |
              df$pol_01_14_TEXT == "Avanza País" |
              df$pol_02_14_TEXT == "Avanza País"] <- "AvP"
## Podemos Peru
df$party_id[df$pol_01 == "Podemos Perú" |
              df$pol_02 == "Podemos Perú" |
              df$pol_01_14_TEXT == "Podemos Perú" |
              df$pol_02_14_TEXT == "Podemos Perú"] <- "PP"
## Partido Morado
df$party_id[df$pol_01 == "Partido Morado" |
              df$pol_02 == "Partido Morado" |
              df$pol_01_14_TEXT == "Partido Morado" |
              df$pol_02_14_TEXT == "Partido Morado"] <- "PM"
## Somos Peru
df$party_id[df$pol_01 == "Somos Perú" |
              df$pol_02 == "Somos Perú" |
              df$pol_01_14_TEXT == "Somos Perú" |
              df$pol_02_14_TEXT == "Somos Perú"] <- "SP"
## Other
df$party_id[df$pol_01_14_TEXT == "apra" |
              df$pol_01_14_TEXT == "Apra" |
              df$pol_01_14_TEXT == "APRA" |
              df$pol_01_14_TEXT == "Cajamarca siempre verde" |
              df$pol_01_14_TEXT == "CORAZON PATRIOTA" |
              df$pol_01_14_TEXT == "Partido popular cristiano" |
              df$pol_01_14_TEXT == "Partido Popular Cristiano" |
              df$pol_01_14_TEXT == "Ppc" |
              df$pol_01_14_TEXT == "PPC" |
              df$pol_01_14_TEXT == "PPP" |
              df$pol_01_14_TEXT == "Social democrático" |
              df$pol_01_14_TEXT == "Frente Esperanza" |
              df$pol_01_14_TEXT == "MAS CALLAO" |
              df$pol_01_14_TEXT == "Alianza gobierno unidad y acción" |
              df$pol_01_14_TEXT == "Frepap" |
              df$pol_01_14_TEXT == "Partido popular cristiano PPC" |
              df$pol_01_14_TEXT == "Partido popular cristiano PPC" |
              df$pol_01_14_TEXT == "Perú posible"|
              df$pol_01_14_TEXT == "Tradicion arequipa"|
              df$pol_01_14_TEXT == "Móvil"|
              df$pol_01_14_TEXT == "Izquierda unida"|
              df$pol_01_14_TEXT == "Movimiento de poder ciudadano"|
              df$pol_02_14_TEXT == "APRA"  |
              df$pol_02_14_TEXT == "Partido patriótico del Perú"] <- "other"
# Less than 20 to other
df$party_id[df$party_id == 'PM' |
              df$party_id == 'PP'] <- 'other'
# Party ID with less categories (Only parties with more than 100)
df$party_id <-  df$party_id
df$party_id[df$party_id == 'APP' |
              df$party_id == 'AvP' |
              df$party_id == 'JP' |
              df$party_id == 'RP' |
              df$party_id == 'SP'] <- 'other'

# MOVEMENT-BASED IDENTITIES
df$cha_id <- NA
df$cha_id[df$pol_06 == "Anti-Fujimorismo" |
            df$pol_07 == "Anti-Fujimorismo"] <- "Anti-Fujimorismo"
df$cha_id[df$pol_06 == "Fujimorismo" |
            df$pol_07 == "Fujimorismo"] <- "Fujimorismo"
df$cha_id[df$pol_07 == "Nenhum" |
            df$pol_07 == "Ninguno" |
            df$pol_07 == "No sabe" |
            df$pol_07 == "Não sei"] <- "none"

# IDEOLOGICAL IDENTITIES
df$ido_id <- NA
df$ido_id[df$pol_04 == "Derecha" |
              df$pol_04 == "Direita" |
              df$pol_04 == "Centro-direita" |
              df$pol_04 == "Centroderecha" |
              df$pol_05 == "Derecha" |
              df$pol_05 == "Direita" |
              df$pol_05 == "Centro-direita" |
              df$pol_05 == "Centroderecha" ] <- "right"
df$ido_id[df$pol_04 == "Esquerda" |
              df$pol_04 == "Izquierda" |
              df$pol_04 == "Centro-esquerda" |
              df$pol_04 == "Centroizquierda" |
              df$pol_05 == "Esquerda" |
              df$pol_05 == "Izquierda" |
              df$pol_05 == "Centro-esquerda" |
              df$pol_05 == "Centroizquierda" ] <- "left"
df$ido_id[df$pol_04 == "Centro" |
              df$pol_05 == "Centro"] <- "center"
df$ido_id[df$pol_05 == "Não sei" |
              df$pol_05 == "Nenhum" |
              df$pol_05 == "Ninguno" |
              df$pol_05 == "No sabe"] <- "none"

df$ido_id <- factor(df$ido_id, levels = c("left", "center", "right", "none"))

# IDENTITY SCALES --------------------------------------------------------------

# Recoding function
re_iden <- function(x, y) {
  # Create new variable
  new_var <- NA
  new_var <- ifelse(x == "1. Muy en desacuerdo" | 
                      x == "1. Discordo firmemente", 
                    1, 
                    ifelse(x == "6. Muy de acuerdo" | 
                             x == "6. Concordo firmemente", 
                           6,  
                           ifelse(x == "No sabe" | x == "Não sei", 
                                  NA, 
                                  x)))
  # Variable to Numeric
  new_var <- as.numeric(new_var)
  # Impute variable as 1 if identity == none
  new_var <- ifelse(y == "none", 1, new_var)
  return(new_var)
}


## Partisan Identity scale
df$pid_1r <- re_iden(df$pid_1, df$party_id)
df$pid_2r <- re_iden(df$pid_2, df$party_id)
df$pid_3r <- re_iden(df$pid_3, df$party_id)
df$pid_4r <- re_iden(df$pid_4, df$party_id)
df$pid_5r <- re_iden(df$pid_5, df$party_id)
df$pid_6r <- re_iden(df$pid_6, df$party_id)
## Ideological 
df$iid_1r <- re_iden(df$iid_1, df$ido_id)
df$iid_2r <- re_iden(df$iid_2, df$ido_id)
df$iid_3r <- re_iden(df$iid_3, df$ido_id)
df$iid_4r <- re_iden(df$iid_4, df$ido_id)
df$iid_5r <- re_iden(df$iid_5, df$ido_id)
df$iid_6r <- re_iden(df$iid_6, df$ido_id)
## Movement-based
df$cid_1r <- re_iden(df$pei_1, df$cha_id)
df$cid_2r <- re_iden(df$pei_2, df$cha_id)
df$cid_3r <- re_iden(df$pei_3, df$cha_id)
df$cid_4r <- re_iden(df$pei_4, df$cha_id)
df$cid_5r <- re_iden(df$pei_5, df$cha_id)
df$cid_6r <- re_iden(df$pei_6, df$cha_id)

# CONFIRMATORY FACTOR ANALYSIS ------------------------------------------------- 

# 1 Factor Model
m_1 <- '
pid_sc =~ pid_1r + pid_2r + pid_3r + pid_4r + pid_5r + pid_6r +
iid_1r + iid_2r + iid_3r + iid_4r + iid_5r + iid_6r + 
cid_1r + cid_2r + cid_3r + cid_4r + cid_5r + cid_6r
'
# Fit
m_1p_fit <- sem(m_1,
                missing = "Ml", # for missing values
                data = df)
## Summary and fit
fit_1p <- fitMeasures(m_1p_fit, c("cfi", "rmsea", "srmr")) 

# 3-Factor Model
m_3 <- '
pid_sc =~ pid_1r + pid_2r + pid_3r + pid_4r + pid_5r + pid_6r
ido_sc =~ iid_1r + iid_2r + iid_3r + iid_4r + iid_5r + iid_6r
cha_sc =~ cid_1r + cid_2r + cid_3r + cid_4r + cid_5r + cid_6r
pid_sc ~~ ido_sc
pid_sc ~~ cha_sc
cha_sc ~~ ido_sc
pid_1r ~~ pid_2r
pid_3r ~~ pid_4r
pid_5r ~~ pid_6r
iid_1r ~~ iid_2r
iid_3r ~~ iid_4r
iid_5r ~~ iid_6r
cid_1r ~~ cid_2r
cid_3r ~~ cid_4r
cid_5r ~~ cid_6r
'
## Fit
m_3p_fit <- sem(m_3,
                missing = "Ml", # for missing values
                data = df)
## Summary and fit
fit_3p <- fitMeasures(m_3p_fit, c("cfi", "rmsea", "srmr"))

# Table A3 (Supplemental Material)
fit_table <- data.frame(
  Fit_Index = names(fit_1p),
  Model_1P = as.numeric(fit_1p),
  Model_3P = as.numeric(fit_3p)
)

# Print the table
print(fit_table)

# CREATE IDENTITY VARIABLES ----------------------------------------------------

# Predict from CFA model
df <- cbind(df, lavPredict(m_3p_fit))

# Scale from 0-1
scale_01 <- function(x) {
  new_var <- NA
  new_var <- (x - min(x, na.rm = TRUE))/
    (max(x, na.rm = TRUE)- min(x, na.rm = TRUE))
  return(new_var)
}

df$pid_st <- scale_01(df$pid_sc)
df$ido_st <- scale_01(df$ido_sc)
df$cha_st <- scale_01(df$cha_sc)

# CUEING EXPERIMENT VARIABLES --------------------------------------------------

# Clean variables function
clean_var <- function(x, y) {
  as.numeric(paste0(gsub("\\D+", "", x), 
                    gsub("\\D+", "", y)))
}

## CUES 1
df$cues_1a <- clean_var(df$cues_1a_t, df$cues_1a_c)
df$cues_1b <- clean_var(df$cues_1b_t, df$cues_1b_c)
df$cues_1c <- clean_var(df$cues_1c_t, df$cues_1c_c)
df$cues_1d <- clean_var(df$cues_1d_t, df$cues_1d_c)
df$cues_1e <- clean_var(df$cues_1e_t, df$cues_1e_c)
df$cues_1f <- clean_var(df$cues_1f_t, df$cues_1f_c)
## CUES 2
df$cues_2a <- clean_var(df$cues_2a_t, df$cues_2a_c)
df$cues_2b <- clean_var(df$cues_2b_t, df$cues_2b_c)
df$cues_2c <- clean_var(df$cues_2c_t, df$cues_2c_c)
df$cues_2d <- clean_var(df$cues_2d_t, df$cues_2d_c)
df$cues_2e <- clean_var(df$cues_2e_t, df$cues_2e_c)
df$cues_2f <- clean_var(df$cues_2f_t, df$cues_2f_c)
## CUES 3
df$cues_3a <- clean_var(df$cues_3a_t, df$cues_3a_c)
df$cues_3b <- clean_var(df$cues_3b_t, df$cues_3b_c)
df$cues_3c <- clean_var(df$cues_3c_t, df$cues_3c_c)
df$cues_3d <- clean_var(df$cues_3d_t, df$cues_3d_c)
df$cues_3e <- clean_var(df$cues_3e_t, df$cues_3e_c)
df$cues_3f <- clean_var(df$cues_3f_t, df$cues_3f_c)
## CUES 4
df$cues_4a <- clean_var(df$cues_4a_t, df$cues_4a_c)
df$cues_4b <- clean_var(df$cues_4b_t, df$cues_4b_c)
df$cues_4c <- clean_var(df$cues_4c_t, df$cues_4c_c)
df$cues_4d <- clean_var(df$cues_4d_t, df$cues_4d_c)
df$cues_4e <- clean_var(df$cues_4e_t, df$cues_4e_c)
df$cues_4f <- clean_var(df$cues_4f_t, df$cues_4f_c)
## CUES 5
df$cues_5a <- clean_var(df$cues_5a_t, df$cues_5a_c)
df$cues_5b <- clean_var(df$cues_5b_t, df$cues_5b_c)
df$cues_5c <- clean_var(df$cues_5c_t, df$cues_5c_c)
df$cues_5d <- clean_var(df$cues_5d_t, df$cues_5d_c)
df$cues_5e <- clean_var(df$cues_5e_t, df$cues_5e_c)
df$cues_5f <- clean_var(df$cues_5f_t, df$cues_5f_c)
## CUES 6
df$cues_6a <- clean_var(df$cues_6a_t, df$cues_6a_c)
df$cues_6b <- clean_var(df$cues_6b_t, df$cues_6b_c)
df$cues_6c <- clean_var(df$cues_6c_t, df$cues_6c_c)
df$cues_6d <- clean_var(df$cues_6d_t, df$cues_6d_c)
df$cues_6e <- clean_var(df$cues_6e_t, df$cues_6e_c)
df$cues_6f <- clean_var(df$cues_6f_t, df$cues_6f_c)

# MERGE VAR
df$cues_1 <- coalesce(df$cues_1a, 
                      df$cues_1b, 
                      df$cues_1c, 
                      df$cues_1d, 
                      df$cues_1e,
                      df$cues_1f)
df$cues_2 <- coalesce(df$cues_2a, 
                      df$cues_2b, 
                      df$cues_2c, 
                      df$cues_2d, 
                      df$cues_2e,
                      df$cues_2f)
df$cues_3 <- coalesce(df$cues_3a, 
                      df$cues_3b, 
                      df$cues_3c, 
                      df$cues_3d, 
                      df$cues_3e,
                      df$cues_3f)
df$cues_4 <- coalesce(df$cues_4a, 
                      df$cues_4b, 
                      df$cues_4c, 
                      df$cues_4d, 
                      df$cues_4e,
                      df$cues_4f)
df$cues_5 <- coalesce(df$cues_5a, 
                      df$cues_5b, 
                      df$cues_5c, 
                      df$cues_5d, 
                      df$cues_5e,
                      df$cues_5f)
df$cues_6 <- coalesce(df$cues_6a, 
                      df$cues_6b, 
                      df$cues_6c, 
                      df$cues_6d, 
                      df$cues_6e, 
                      df$cues_6f)
# TREATMENT VARIABLE
## FUNCTION
treat_var <- function(at, bt, ct, dt, et, ft) {
  ifelse(at != "" | 
           bt != "" |
           ct != "" |
           dt != "" |
           et != "" |
           ft != "", 1, 0)
}

## CUE 1
df$treat_1 <- treat_var(df$cues_1a_t,
                        df$cues_1b_t,
                        df$cues_1c_t,
                        df$cues_1d_t,
                        df$cues_1e_t,
                        df$cues_1f_t)
## CUE 2
df$treat_2 <- treat_var(df$cues_2a_t,
                        df$cues_2b_t,
                        df$cues_2c_t,
                        df$cues_2d_t,
                        df$cues_2e_t,
                        df$cues_2f_t)
## CUE 3
df$treat_3 <- treat_var(df$cues_3a_t,
                        df$cues_3b_t,
                        df$cues_3c_t,
                        df$cues_3d_t,
                        df$cues_3e_t,
                        df$cues_3f_t)
## CUE 4
df$treat_4 <- treat_var(df$cues_4a_t,
                        df$cues_4b_t,
                        df$cues_4c_t,
                        df$cues_4d_t,
                        df$cues_4e_t,
                        df$cues_4f_t)
## CUE 5
df$treat_5 <- treat_var(df$cues_5a_t,
                        df$cues_5b_t,
                        df$cues_5c_t,
                        df$cues_5d_t,
                        df$cues_5e_t,
                        df$cues_5f_t)
## CUE 6
df$treat_6 <- treat_var(df$cues_6a_t,
                        df$cues_6b_t,
                        df$cues_6c_t,
                        df$cues_6d_t,
                        df$cues_6e_t,
                        df$cues_6f_t)

# CONSISTENT OR INCONSISTENT CUE
df$con_1 <- ifelse(df$group_2 == 'fuji' & df$treat_1 == 1, 
                   "1_consistent",
                   ifelse(df$group_2 == 'anti_fuji' & df$treat_1 == 1, 
                          '3_inconsistent', 
                          '2_none'))
df$con_2 <- ifelse(df$group_2 == 'anti_fuji' & df$treat_2 == 1, 
                   "1_consistent",
                   ifelse(df$group_2 == 'fuji' & df$treat_2 == 1, 
                          '3_inconsistent', 
                          '2_none'))
df$con_3 <- ifelse(df$group_2 == 'anti_fuji' & df$treat_3 == 1, 
                   "1_consistent",
                   ifelse(df$group_2 == 'fuji' & df$treat_3 == 1, 
                          '3_inconsistent', 
                          '2_none'))
df$con_4 <- ifelse(df$group_2 == 'fuji' & df$treat_4 == 1, 
                   "1_consistent",
                   ifelse(df$group_2 == 'anti_fuji' & df$treat_4 == 1, 
                          '3_inconsistent', 
                          '2_none'))
df$con_5 <- ifelse(df$group_2 == 'anti_fuji' & df$treat_5 == 1, 
                   "1_consistent",
                   ifelse(df$group_2 == 'fuji' & df$treat_5 == 1, 
                          '3_inconsistent', 
                          '2_none'))
df$con_6 <- ifelse(df$group_2 == 'fuji' & df$treat_6 == 1, 
                   "1_consistent",
                   ifelse(df$group_2 == 'anti_fuji' & df$treat_6 == 1, 
                          '3_inconsistent', 
                          '2_none'))
# FUJIMORISTA/ANTI-FUJIMORISTA/NONE
con_var <- function(var, char){
  new_var <- ifelse(var == 1, char, 'none')
  return(new_var)
}

df$cond_1 <- con_var(df$treat_1, "fuji")
df$cond_2 <- con_var(df$treat_2, "anti")
df$cond_3 <- con_var(df$treat_3, "anti")
df$cond_4 <- con_var(df$treat_4, "fuji")
df$cond_5 <- con_var(df$treat_5, "anti")
df$cond_6 <- con_var(df$treat_6, "fuji")

# TIME TO RESPONSE -------------------------------------------------------------

# Function
time_var <- function(cue, click, data) {
  if (click != "Page") { 
    data <- data %>% 
      select(paste0("cues_", cue, "a_time_", click, ".Click"), 
             paste0("cues_", cue, "b_time_", click, ".Click"), 
             paste0("cues_", cue, "c_time_", click, ".Click"), 
             paste0("cues_", cue, "d_time_", click, ".Click"), 
             paste0("cues_", cue, "e_time_", click, ".Click"), 
             paste0("cues_", cue, "f_time_", click, ".Click")) %>% 
      mutate_all(as.numeric) %>%
      mutate(new_var = rowSums(across(where(is.numeric)), na.rm = TRUE)) 
    
    return(data$new_var)} else{
      data <- data %>% 
        select(paste0("cues_", cue, "a_time_", click, ".Submit"), 
               paste0("cues_", cue, "b_time_", click, ".Submit"), 
               paste0("cues_", cue, "c_time_", click, ".Submit"), 
               paste0("cues_", cue, "d_time_", click, ".Submit"), 
               paste0("cues_", cue, "e_time_", click, ".Submit"), 
               paste0("cues_", cue, "f_time_", click, ".Submit")) %>% 
        mutate_all(as.numeric) %>%
        mutate(new_var = rowSums(across(where(is.numeric)), na.rm = TRUE)) 
      
      return(data$new_var)
    }
}

# First Click
df$time_1_f <- time_var(1, "First", df)
df$time_2_f <- time_var(2, "First", df)
df$time_3_f <- time_var(3, "First", df)
df$time_4_f <- time_var(4, "First", df)
df$time_5_f <- time_var(5, "First", df)
df$time_6_f <- time_var(6, "First", df)

# Second Click
df$time_1_l <- time_var(1, "Last", df)
df$time_2_l <- time_var(2, "Last", df)
df$time_3_l <- time_var(3, "Last", df)
df$time_4_l <- time_var(4, "Last", df)
df$time_5_l <- time_var(5, "Last", df)
df$time_6_l <- time_var(6, "Last", df)

# Page 
df$time_1_p <- time_var(1, "Page", df)
df$time_2_p <- time_var(2, "Page", df)
df$time_3_p <- time_var(3, "Page", df)
df$time_4_p <- time_var(4, "Page", df)
df$time_5_p <- time_var(5, "Page", df)
df$time_6_p <- time_var(6, "Page", df)

# CUE ORDER --------------------------------------------------------------------

df <- cbind(df, 
            df %>% 
              separate(col = FL_118_DO, 
                       sep = "\\|", 
                       into = c("a1", 
                                "a2",
                                "a3",
                                "a4",
                                "a5",
                                "a6"), 
                       remove = TRUE) %>% 
              separate(col = FL_111_DO, 
                       sep = "\\|", 
                       into = c("b1", 
                                "b2",
                                "b3",
                                "b4",
                                "b5",
                                "b6"), 
                       remove = TRUE) %>% 
              separate(col = FL_104_DO, 
                       sep = "\\|", 
                       into = c("c1", 
                                "c2",
                                "c3",
                                "c4",
                                "c5",
                                "c6"), 
                       remove = TRUE) %>% 
              separate(col = FL_97_DO, 
                       sep = "\\|", 
                       into = c("d1", 
                                "d2",
                                "d3",
                                "d4",
                                "d5",
                                "d6"), 
                       remove = TRUE) %>% 
              separate(col = FL_90_DO, 
                       sep = "\\|", 
                       into = c("e1", 
                                "e2",
                                "e3",
                                "e4",
                                "e5",
                                "e6"), 
                       remove = TRUE) %>% 
              separate(col = FL_83_DO, 
                       sep = "\\|", 
                       into = c("f1", 
                                "f2",
                                "f3",
                                "f4",
                                "f5",
                                "f6"), 
                       remove = TRUE) %>%
              select(a1:f6) %>%
              mutate_all(na_if,"") %>% 
              mutate_all(funs(gsub("[^0-9.-]", "", .))) %>%
              mutate(order_1 = coalesce(a1, 
                                        b1, 
                                        c1, 
                                        d1, 
                                        e1, 
                                        f1),
                     order_2 = coalesce(a2, 
                                        b2, 
                                        c2, 
                                        d2, 
                                        e2, 
                                        f2),
                     order_3 = coalesce(a3, 
                                        b3, 
                                        c3, 
                                        d3, 
                                        e3, 
                                        f3),
                     order_4 = coalesce(a4, 
                                        b4, 
                                        c4, 
                                        d4, 
                                        e4, 
                                        f4),
                     order_5 = coalesce(a5, 
                                        b5, 
                                        c5, 
                                        d5, 
                                        e5, 
                                        f5),
                     order_6 = coalesce(a6, 
                                        b6, 
                                        c6, 
                                        d6, 
                                        e6, 
                                        f6)) %>% 
              select(order_1:order_6)
)


# OTHER COVARIATES -------------------------------------------------------------

df$female <- ifelse(df$demo_01 == "Femenino", 1, 0)
df$region <- df$demo_02
df$age <- as.numeric(df$demo_03)
df$educ <- recode(df$demo_05, 
                  "Sin instrucción" = 0,
                  "Primaria incompleta" = 0,
                  "Primaria completa" = 0, 
                  "Secundaria incompleta" = 0, 
                  "Secundaria completa" = 1, 
                  "Instituto técnico o profesional incompleto" = 2, 
                  "Instituto técnico o profesional completo" = 2, 
                  "Universitaria incompleta" = 3, 
                  "Universitaria completa" = 3)
df$in_pol <- recode(df$pb06, 
                    "Para nada interesado" = 0, 
                    "No muy interesado" = 1, 
                    "Relativamente interesado" = 2, 
                    "Interesado" = 3, 
                    "Muy interesado" = 4, 
                    "No sabe" = 0)

df$trust_p <- recode(df$trust_01f, 
                     "1. No confío en absoluto" = 1, 
                     "2" = 2, 
                     "3" = 3, 
                     "4. Indiferente" = 4, 
                     "5" = 5, 
                     "6" = 6, 
                     "7. Confío mucho" = 7, 
                     "No sabe" = 4, 
                     .default = 4) - 1

# RESHAPE DATA -----------------------------------------------------------------

df_e <- merge(
  merge(
    merge(
      df %>% 
        select(ResponseId,
               change,
               cha_id, 
               cha_st,
               party_id,
               pid_st, 
               ido_id,
               ido_st,
               female,
               region, 
               age, 
               educ,
               trust_p,
               pb06, # Interest in politics without processing
               in_pol,
               Q_TotalDuration,
               cues_1:cues_6) %>% 
        pivot_longer(cues_1:cues_6, 
                     names_sep = '_',
                     names_to = c('cue', 'exp'), 
                     values_to = 'position') %>% 
        select(-cue),
      df %>% 
        select(ResponseId,
               cond_1:cond_6) %>% 
        pivot_longer(cond_1:cond_6, 
                     names_sep = '_',
                     names_to = c('cue', 'exp'), 
                     values_to = 'cond') %>% 
        select(-cue), 
      by = c("ResponseId", "exp")), 
    df %>% 
      select(ResponseId,
             time_1_f:time_6_p) %>% 
      pivot_longer(time_1_f:time_6_p, 
                   names_sep = '_',
                   names_to = c('cue', 'exp', "time_type"), 
                   values_to = 'time') %>% 
      select(-cue) %>% 
      pivot_wider(names_from = time_type,
                  values_from = time, 
                  names_prefix = "time_"), 
    by = c("ResponseId", "exp")), 
  df %>% 
    select(ResponseId,
           order_1:order_6) %>% 
    pivot_longer(order_1:order_6, 
                 names_sep = '_',
                 names_to = c('cue', 'exp'), 
                 values_to = 'order') %>%  
    select(-cue) %>% 
    mutate(order = as.numeric(order)), 
  by = c("ResponseId", "exp")) %>% 
  filter(!(change == "" & exp == 2)) # Remove issues with experiment 2. 

# Save data --------------------------------------------------------------------
saveRDS(df, "04_outputs/clean_dataset.rds")
saveRDS(df_e, "04_outputs/clean_dataset_reshaped.rds")